<?php
// composer下载方式
// 先使用composer命令下载：
// composer require owner888/phpspider
// 引入加载器
//require './vendor/autoload.php';

// GitHub下载方式
require_once  "common.php";
require_once __DIR__ . '/../vendor/autoload.php';

use phpspider\core\phpspider;
use phpspider\core\log;
use phpspider\core\selector;

/* Do NOT delete this comment */
/* 不要删除这段注释 */

$configs = array(
    'name' => '斗罗大陆',
    'domains' => array(
        'v.qq.com'
    ),
    'log_show' => true,
    'log_file' => 'data/test_baowenwang.log',
    'log_type' => 'error,warn,debug',
    'tasknum' => 1,
    'max_depth' => 1,
    'export' => [
        'type' => 'db',
        'table' => 'spider_baowen',
    ],

    'db_config' => [
        'host' => '127.0.0.1',
        'port' => '3306',
        'user' => 'root',
        'pass' => 'root',     //密码
        'name' => 'php_spider'  //数据库名
    ],
    'scan_urls' => array(
        'https://v.qq.com/x/cover/m441e3rjq9kwpsc.html'
    ),
    'content_url_regexes' => [
        'https://v.qq.com/x/cover/m441e3rjq9kwpsc.html'
    ],
    'list_url_regexes' => [
        'https://v.qq.com/x/cover/m441e3rjq9kwpsc.html'
    ],

    'fields' => [
        [
            'name' => "title",
            'selector' => "//h1[@id='title']",
            'required' => true,
        ],
        [
            'name' => 'content',
            'selector' => "//div[@id='content']",
            'required' => true,
        ],
    ],
);

$spider = new phpspider($configs);

//开始回调
$spider->on_start = function ($spider) {
    echo "on_start回调开始:".PHP_EOL;
    for ($i = 1; $i <= 2; $i++) {
        $url = "http://www.cnbaowen.net/news/list-3720-{$i}.html";
        echo "on_start:".$url.PHP_EOL;
       // $spider->add_url($url);
    }
    echo "on_start回调结束:".PHP_EOL;
};

//列表回调
/*$spider->on_list_page = function ($page, $content, $spider) {
    echo "on_list_page回调开始 ".PHP_EOL;
    $text = selector::select($content, "//div[@class='player_hint']");// 每周六10点更新一集
    p($text);
    echo "on_list_page回调结束".PHP_EOL;
    // 通知爬虫不再从当前网页中发现待爬url
    return false;
};*/


$spider->on_content_page = function ($page, $content, $phpspider) {
    echo "on_content_page回调开始 ".PHP_EOL;
   // p($page);
//    p($content);

    //$content = selector::select($content,"//span[@class='f_r']");         // 中间详情页

    $text = selector::select($content, "//div[@class='player_hint']");// 每周六10点更新一集
    $preg = "/(每周.+?)'/";
    preg_match_all($preg,$text,$data);
    p($text);
    file_put_contents("data/test.txt",$content);

    echo "on_content_page回调结束 ".PHP_EOL;
    return false;
};


$spider->start();